// 文档 https://github.com/hooke007/MPV_lazy/wiki/4_GLSL

// CuNNy 2x12 DS
// Copyright (c) 2024 funnyplanter

// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 3.0 of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this program.  If not, see <https://www.gnu.org/licenses/>.
/* ------------------------------------------------------------------- */


//!DESC [CuNNy_2x12_DS] -in
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND LUMA
//!SAVE in
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) F((LUMA_mul * texelFetch(LUMA_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(1, 1) + ivec2(0, 0), 0)).r)
shared F G[1][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
		}
	}
	barrier();
	F s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2];
	r0 += V4(-5.191e-02, 2.455e-02, 3.272e-01, 2.582e-02) * s0_0_0;
	r1 += V4(-4.211e-02, -7.496e-04, -2.097e-01, 1.265e-02) * s0_0_0;
	r2 += V4(-7.447e-02, 1.310e-02, -8.455e-02, 1.625e-02) * s0_0_0;
	r0 += V4(-2.028e-01, -5.257e-02, 1.477e-01, -5.079e-02) * s0_0_1;
	r1 += V4(4.814e-01, -8.497e-01, -2.609e-01, 2.017e-02) * s0_0_1;
	r2 += V4(-8.010e-02, -3.924e-02, -2.022e-02, -1.350e-01) * s0_0_1;
	r0 += V4(2.706e-01, 2.162e-02, -9.736e-03, 2.180e-02) * s0_0_2;
	r1 += V4(4.432e-01, 8.543e-01, 4.775e-01, -3.009e-02) * s0_0_2;
	r2 += V4(-3.725e-02, -6.188e-02, 4.952e-01, -9.321e-02) * s0_0_2;
	r0 += V4(5.377e-01, -8.473e-02, 6.348e-01, -1.252e-01) * s0_1_0;
	r1 += V4(-2.185e-03, 3.782e-03, -1.004e-01, -5.375e-02) * s0_1_0;
	r2 += V4(-2.385e-01, -1.786e-02, -1.480e-02, 2.699e-02) * s0_1_0;
	r0 += V4(-9.434e-01, -8.926e-01, -1.007e+00, -8.675e-01) * s0_1_1;
	r1 += V4(-9.980e-01, -5.412e-02, -4.424e-01, -1.035e+00) * s0_1_1;
	r2 += V4(1.031e+00, -7.735e-02, -2.289e-02, 6.952e-01) * s0_1_1;
	r0 += V4(4.494e-03, -1.101e-01, -1.018e-01, -1.142e-01) * s0_1_2;
	r1 += V4(1.165e-01, 7.109e-02, 6.309e-01, 1.087e+00) * s0_1_2;
	r2 += V4(-1.140e-01, -8.667e-02, -1.041e-01, -3.534e-01) * s0_1_2;
	r0 += V4(3.375e-01, 6.424e-02, 1.994e-02, 9.097e-02) * s0_2_0;
	r1 += V4(3.208e-02, 1.428e-03, 2.434e-02, 2.824e-02) * s0_2_0;
	r2 += V4(-5.633e-02, 5.896e-02, -5.889e-03, 3.063e-02) * s0_2_0;
	r0 += V4(2.049e-01, 9.394e-01, -1.221e-01, 9.395e-01) * s0_2_1;
	r1 += V4(-7.108e-03, -8.539e-03, -1.112e-01, -2.324e-02) * s0_2_1;
	r2 += V4(-2.983e-01, -1.784e-01, -4.995e-02, -5.822e-02) * s0_2_1;
	r0 += V4(-1.770e-01, 9.111e-02, 1.068e-01, 8.840e-02) * s0_2_2;
	r1 += V4(-2.290e-02, -1.237e-02, 7.306e-03, -3.704e-03) * s0_2_2;
	r2 += V4(-1.217e-01, 5.689e-01, -2.592e-02, -6.608e-03) * s0_2_2;
	r0 += V4(-4.064e-03, -7.491e-03, -4.980e-04, 1.221e-02);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(9.808e-04, 1.763e-02, 2.270e-03, -5.116e-03);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(3.953e-03, 8.306e-03, 1.239e-02, 4.828e-03);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_2x12_DS] -conv1
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND in
//!BIND LUMA
//!SAVE conv1
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((in_mul * texelFetch(in_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-4.158e-02, -1.316e-01, 9.439e-02, 3.049e-01, 9.432e-02, -7.568e-02, 2.066e-01, -7.366e-01, 1.310e-01, 8.916e-02, 8.797e-02, -1.581e-01, -1.316e-01, 2.534e-01, -7.707e-02, 3.548e-01) * s0_0_0;
	r1 += M4(2.455e-01, 1.012e-02, 1.590e-01, 2.719e-02, -7.850e-01, -1.769e-01, 3.350e-01, -4.040e-01, -1.411e-01, 1.728e-02, -6.952e-02, 1.675e-02, 5.570e-01, 1.946e-01, -5.703e-01, 3.603e-01) * s0_0_0;
	r2 += M4(-1.561e-01, -2.455e-01, -1.595e-02, -4.196e-01, -5.997e-01, 5.946e-01, -2.861e-02, 6.705e-01, 5.627e-02, 9.637e-02, 1.782e-02, 9.602e-02, 3.506e-01, -3.833e-01, 2.417e-02, -6.764e-01) * s0_0_0;
	r0 += M4(-1.632e-01, -4.619e-02, 9.993e-02, 1.357e-01, -1.000e+00, 8.417e-01, -1.000e+00, -3.340e-02, 4.012e-03, 3.537e-02, 1.499e-02, 1.425e-02, 1.000e+00, -3.113e-01, -1.000e+00, 8.318e-01) * s0_0_1;
	r1 += M4(2.739e-01, -1.635e-02, 1.071e-01, 6.431e-02, -2.360e-02, -2.964e-01, -1.000e+00, 1.000e+00, 7.420e-03, -1.171e-02, 3.875e-03, -6.862e-02, 3.882e-01, 5.342e-01, 1.000e+00, -1.000e+00) * s0_0_1;
	r2 += M4(1.969e-01, -1.257e-01, -2.033e-01, -5.647e-01, 4.023e-02, 1.000e+00, -4.620e-01, 1.860e-01, -1.344e-01, -3.099e-03, -2.266e-02, 2.443e-01, -7.086e-01, -8.007e-01, 7.871e-01, 2.470e-01) * s0_0_1;
	r0 += M4(-6.202e-01, 9.048e-02, 9.654e-02, 5.114e-01, -7.700e-01, 7.324e-01, -6.035e-01, 5.723e-01, -1.515e-01, -4.553e-04, -2.397e-01, -5.006e-03, 2.085e-01, -6.118e-01, 2.854e-01, -1.000e+00) * s0_0_2;
	r1 += M4(2.371e-01, -5.475e-02, -4.546e-04, 2.144e-02, 8.886e-01, -1.120e-01, 4.069e-01, -4.577e-01, 1.890e-01, 1.529e-01, 5.826e-02, 4.934e-02, -6.513e-01, 1.987e-01, -2.510e-01, 4.029e-01) * s0_0_2;
	r2 += M4(2.077e-01, -1.540e-01, -4.109e-02, -8.185e-01, -5.097e-01, -3.961e-01, 2.292e-01, -8.190e-02, -1.320e-01, 1.790e-01, 4.937e-02, 3.174e-01, 3.466e-01, 5.202e-01, -1.135e-01, 4.462e-01) * s0_0_2;
	r0 += M4(9.283e-02, -5.878e-02, 1.877e-02, 2.998e-01, -1.365e-01, 2.248e-01, 4.043e-01, -1.000e+00, -9.641e-02, 3.521e-02, -1.158e-01, -3.418e-01, 3.076e-01, -4.072e-01, -2.961e-01, 3.740e-01) * s0_1_0;
	r1 += M4(9.579e-02, 1.033e-01, -9.118e-02, 2.594e-01, -8.669e-01, -4.571e-02, -6.309e-01, 8.590e-02, -1.519e-01, 1.153e-02, -9.937e-02, -9.670e-02, 3.474e-01, 1.623e-01, 6.998e-01, -2.052e-01) * s0_1_0;
	r2 += M4(9.698e-02, -5.533e-02, -1.161e-01, -4.581e-01, -6.531e-02, 9.734e-02, -5.414e-01, 2.439e-02, -3.978e-02, 1.156e-02, -2.107e-02, 2.486e-01, 4.825e-02, -3.506e-01, 6.660e-01, -5.989e-02) * s0_1_0;
	r0 += M4(3.400e-01, -4.397e-01, 2.182e-01, -3.762e-02, 1.000e+00, 8.180e-01, -1.242e-01, -6.621e-01, 4.303e-01, -1.527e-01, 1.907e-01, -1.584e-01, -8.074e-01, -4.756e-01, -8.380e-02, 3.837e-01) * s0_1_1;
	r1 += M4(1.179e-01, 1.496e-01, 2.410e-01, -3.918e-02, -9.486e-01, -3.990e-01, 5.762e-01, -2.937e-01, -3.131e-01, 4.590e-02, 3.001e-01, -1.493e-01, -4.043e-01, 6.512e-01, 7.132e-03, 5.589e-01) * s0_1_1;
	r2 += M4(2.822e-01, -6.270e-01, -1.371e-01, -6.121e-01, -2.451e-01, 3.928e-01, 5.411e-01, -6.847e-01, 1.888e-02, -1.842e-01, -2.822e-02, 2.114e-01, 7.295e-02, -4.501e-01, 9.732e-02, 3.705e-01) * s0_1_1;
	r0 += M4(-8.541e-02, 3.243e-01, -3.075e-01, 3.406e-01, -2.819e-01, -1.000e+00, -2.956e-01, 4.043e-01, -3.804e-01, 1.163e-01, -8.184e-01, -5.602e-02, 3.857e-01, 3.783e-01, 3.133e-01, -2.883e-01) * s0_1_2;
	r1 += M4(4.300e-01, -8.127e-02, -4.553e-02, 8.671e-02, 3.686e-01, 1.597e-01, -2.093e-01, 4.398e-01, 1.708e-02, 2.636e-01, -6.311e-02, 3.389e-01, -9.055e-03, -1.270e-01, 2.407e-01, -1.764e-01) * s0_1_2;
	r2 += M4(4.654e-02, -2.853e-01, -2.918e-01, -4.170e-01, -4.953e-02, -4.904e-01, -2.494e-01, 3.462e-02, 2.806e-01, 2.510e-01, 3.127e-02, 6.227e-01, 2.207e-01, 3.621e-01, 3.213e-01, -1.962e-02) * s0_1_2;
	r0 += M4(1.272e-01, 9.297e-02, 1.462e-01, 2.517e-01, -1.859e-01, -1.370e-01, -1.690e-01, 3.838e-01, -7.946e-02, 2.639e-02, -7.796e-02, -3.661e-01, 2.139e-01, -7.836e-02, 1.294e-01, -5.137e-01) * s0_2_0;
	r1 += M4(2.853e-01, 7.496e-03, 6.427e-02, 1.874e-02, -2.728e-02, -1.500e-02, 7.914e-02, -1.110e-01, -3.669e-01, 1.439e-02, 4.820e-02, -1.963e-01, -1.544e-01, -3.508e-02, -5.097e-02, 3.956e-02) * s0_2_0;
	r2 += M4(8.859e-02, -1.196e-01, 1.417e-02, -3.708e-01, -7.901e-02, -1.628e-01, 1.818e-02, 3.073e-01, -1.593e-01, 9.756e-02, -5.366e-03, 5.451e-01, 6.663e-02, 1.867e-01, -8.631e-03, -1.637e-01) * s0_2_0;
	r0 += M4(-2.657e-02, 1.375e-01, -8.419e-02, 2.922e-01, 2.855e-01, -5.619e-01, 9.826e-02, -7.107e-01, -1.003e-01, 2.726e-01, 8.143e-02, -6.754e-01, -2.947e-01, 2.392e-01, 1.237e-01, 4.752e-01) * s0_2_1;
	r1 += M4(3.127e-01, -3.963e-02, -9.806e-02, 8.422e-02, -2.880e-01, 7.747e-02, -1.188e-02, 3.563e-01, -1.000e+00, 2.115e-01, -1.294e-01, 2.702e-01, 1.447e-01, 5.561e-02, 3.619e-02, -1.889e-01) * s0_2_1;
	r2 += M4(-1.991e-01, -9.243e-02, -2.456e-01, -5.137e-01, 9.885e-02, 2.773e-01, -1.550e-01, 5.200e-02, -1.057e-01, 5.033e-01, 1.324e-01, 8.657e-01, 3.283e-02, -1.871e-01, 3.154e-01, 7.551e-02) * s0_2_1;
	r0 += M4(-1.459e-02, -2.882e-01, 3.780e-02, 1.416e-01, -2.251e-01, -1.000e+00, 2.274e-01, 3.447e-01, 2.269e-01, -8.783e-01, 3.950e-01, -1.438e-01, 4.041e-01, -3.814e-01, -1.489e-02, -3.760e-01) * s0_2_2;
	r1 += M4(1.169e-02, -2.645e-02, -1.340e-01, -7.157e-02, 2.605e-01, 1.596e-01, 1.587e-01, 1.253e-01, -4.452e-02, 1.125e-01, 2.607e-01, 3.309e-01, -4.593e-01, -1.399e-01, -7.832e-02, 3.090e-03) * s0_2_2;
	r2 += M4(-1.629e-01, -1.723e-01, -8.555e-02, -4.732e-01, -1.102e-01, -8.186e-01, 9.014e-02, -3.495e-01, 6.515e-01, 3.197e-01, 1.220e-01, 5.079e-01, 2.358e-01, 6.951e-01, -2.782e-02, 2.386e-01) * s0_2_2;
	r0 += M4(-1.255e-01, 2.810e-02, -2.339e-01, 2.495e-01, -1.071e-01, 2.540e-01, -1.070e-02, 1.872e-02, -1.527e-01, -1.665e-01, -1.556e-01, -1.912e-01, 2.817e-01, 3.521e-01, 2.858e-02, 1.392e-01) * s1_0_0;
	r1 += M4(2.240e-01, -3.490e-02, -1.544e-02, -1.617e-02, 9.603e-02, -2.305e-02, 7.829e-03, -2.875e-02, -2.229e-01, 1.409e-01, 3.010e-02, 5.489e-02, -1.654e-01, -1.674e-01, 2.630e-01, 7.122e-03) * s1_0_0;
	r2 += M4(3.046e-02, -1.017e-01, -4.129e-02, -1.829e-01, -1.328e-01, -7.445e-02, -4.520e-02, -1.841e-01, 1.407e-01, 9.053e-02, 5.191e-02, 2.038e-01, -1.543e-01, 4.325e-02, -8.025e-02, -5.300e-01) * s1_0_0;
	r0 += M4(-2.530e-01, 5.568e-02, -1.015e-01, 6.279e-02, -3.099e-02, 9.638e-02, -2.181e-02, -1.415e-01, 1.030e-01, -8.183e-02, -3.021e-02, 1.387e-01, -3.020e-01, 5.472e-01, -1.000e+00, -5.572e-01) * s1_0_1;
	r1 += M4(2.281e-01, -5.799e-03, -1.692e-02, -1.718e-02, 1.775e-01, -6.296e-03, 2.457e-02, -4.974e-02, -1.651e-02, 1.096e-01, 1.455e-01, 7.546e-02, 4.616e-01, 7.094e-02, -1.036e-01, -1.987e-01) * s1_0_1;
	r2 += M4(1.260e-01, -2.198e-01, -7.977e-02, -1.960e-01, 2.270e-02, -6.589e-02, -8.772e-02, 1.398e-01, 1.263e-01, -1.299e-02, 9.775e-02, -1.035e-01, -3.928e-01, -3.648e-01, -5.371e-02, 3.213e-01) * s1_0_1;
	r0 += M4(-8.674e-03, 5.930e-02, -2.588e-01, -8.964e-02, 5.985e-02, -1.346e-01, -2.590e-01, -5.305e-02, 2.096e-01, 6.908e-02, 3.512e-01, 1.033e-01, -5.490e-01, 1.130e-01, -1.255e-01, -6.899e-01) * s1_0_2;
	r1 += M4(2.469e-02, -4.604e-02, 2.719e-02, -1.676e-02, 5.822e-02, 3.656e-03, 7.934e-03, -1.031e-02, 3.030e-02, 2.231e-02, 2.838e-02, 1.336e-01, -2.731e-01, -7.548e-02, -2.603e-01, 1.251e-01) * s1_0_2;
	r2 += M4(8.246e-02, -1.352e-01, 1.650e-02, -1.878e-01, 4.067e-02, 2.263e-04, -5.820e-02, 1.782e-01, 2.298e-01, 6.714e-02, 7.398e-02, -3.065e-01, 1.675e-01, 1.219e-01, -3.244e-02, 3.725e-01) * s1_0_2;
	r0 += M4(-3.321e-02, -5.299e-02, -8.226e-02, 5.957e-01, -1.443e-01, -2.823e-02, 1.311e-01, 3.469e-01, 4.957e-01, -1.802e-01, 3.244e-01, -2.908e-01, -4.071e-01, 1.000e+00, 1.868e-01, -1.082e-01) * s1_1_0;
	r1 += M4(5.520e-01, -5.628e-02, 9.143e-02, 4.652e-02, 3.796e-01, 2.049e-01, -3.403e-01, -1.223e-01, -1.083e-01, 9.448e-02, -1.100e-01, -3.230e-02, -1.000e+00, 3.998e-01, -1.000e+00, 4.792e-01) * s1_1_0;
	r2 += M4(2.760e-02, -1.801e-01, -1.207e-01, -2.673e-01, -1.494e-01, -1.434e-01, -7.150e-03, 6.375e-01, 1.487e-01, -1.091e-01, 6.033e-02, -2.381e-01, -9.780e-01, 7.988e-01, 9.690e-02, 2.251e-01) * s1_1_0;
	r0 += M4(-1.000e+00, -5.054e-01, -1.000e+00, -4.739e-01, -6.256e-03, -4.472e-01, 5.050e-01, 2.552e-01, -3.546e-01, -6.682e-01, -1.571e-02, -2.499e-01, 5.019e-01, 1.000e+00, -7.610e-01, 7.285e-01) * s1_1_1;
	r1 += M4(-5.504e-02, -1.831e-01, 2.182e-01, 2.086e-02, -6.719e-01, -5.217e-02, 8.815e-02, -5.346e-02, -2.168e-01, 4.922e-02, -4.324e-02, 2.459e-01, -8.683e-02, 3.209e-02, 5.073e-01, -1.343e-01) * s1_1_1;
	r2 += M4(1.773e-01, -6.270e-01, -5.301e-02, -5.962e-01, 2.200e-01, 4.527e-01, -6.380e-02, 2.484e-01, 1.645e-01, -4.011e-01, 5.417e-02, -1.849e-01, 3.745e-01, 4.420e-01, 3.207e-01, -2.752e-01) * s1_1_1;
	r0 += M4(-1.772e-01, -2.171e-01, 8.856e-02, 1.109e-01, 4.088e-01, -1.883e-01, -2.374e-01, 5.105e-01, -1.945e-01, -3.336e-01, 4.962e-02, -6.079e-02, -5.845e-02, -4.809e-01, 2.984e-02, 1.502e-01) * s1_1_2;
	r1 += M4(-2.251e-01, -1.143e-01, -3.858e-02, 7.351e-02, 2.234e-01, 8.542e-02, 4.355e-01, -1.880e-01, -4.923e-02, -3.777e-02, -1.279e-01, 7.010e-02, 1.948e-01, 9.310e-02, -1.238e-02, 1.562e-01) * s1_1_2;
	r2 += M4(-4.890e-03, -1.437e-01, -5.508e-02, -3.661e-01, -1.971e-01, 1.711e-01, -2.971e-02, -1.439e-01, -1.039e-01, -2.929e-01, -4.323e-02, -7.649e-03, 3.304e-01, 4.247e-01, 9.283e-02, 4.741e-01) * s1_1_2;
	r0 += M4(-2.175e-01, 3.656e-01, 1.201e-01, 2.423e-01, -5.565e-02, -2.280e-01, -2.031e-01, -4.830e-01, -1.323e-01, -2.170e-01, -3.062e-01, 9.799e-02, 3.545e-01, 5.619e-02, 3.635e-01, -2.085e-01) * s1_2_0;
	r1 += M4(-5.818e-01, 1.686e-01, -6.098e-01, 3.465e-01, -6.985e-01, -7.302e-02, -1.000e+00, -5.214e-01, 3.429e-01, 9.698e-02, -4.934e-01, -7.806e-02, -3.101e-01, 5.702e-02, 2.030e-01, -1.548e-01) * s1_2_0;
	r2 += M4(-9.435e-01, 1.995e-01, 2.677e-01, -1.589e-01, -2.397e-01, -1.937e-02, 2.149e-01, 4.125e-01, 4.269e-03, -2.444e-01, -1.888e-01, -1.534e-02, 4.538e-01, -7.648e-02, 5.036e-02, -2.783e-01) * s1_2_0;
	r0 += M4(2.647e-01, 3.197e-02, 7.636e-01, 7.227e-02, -8.348e-01, -6.922e-01, 1.000e+00, -1.062e-01, 9.821e-02, 6.103e-01, -3.347e-01, -1.184e-02, 3.730e-02, -8.066e-01, -8.622e-03, -1.264e-01) * s1_2_1;
	r1 += M4(-1.613e-01, -8.015e-02, -2.730e-01, 4.603e-01, 2.435e-01, 1.498e-01, -4.422e-01, 6.252e-02, 3.993e-01, -1.375e-01, 3.213e-01, -1.274e-01, -4.183e-01, 1.597e-01, -1.959e-01, 8.026e-02) * s1_2_1;
	r2 += M4(1.099e-01, 5.060e-01, -4.076e-02, -1.293e-01, 7.989e-01, 1.565e-01, 1.192e-01, 4.463e-01, -3.491e-01, -3.623e-02, -2.106e-01, 3.041e-02, 1.153e-01, -3.342e-01, 3.723e-02, -3.312e-01) * s1_2_1;
	r0 += M4(6.760e-02, -1.261e-01, -1.360e-01, 2.842e-01, 4.048e-01, 5.991e-01, -2.608e-03, 1.723e-01, -5.098e-01, -1.336e-01, -3.389e-01, -3.921e-01, 2.127e-01, -1.000e+00, 2.726e-01, 3.838e-01) * s1_2_2;
	r1 += M4(-4.128e-02, 6.909e-02, -6.647e-02, -1.419e-02, 1.298e-01, -8.392e-03, 2.140e-01, -2.124e-02, -2.075e-01, -9.165e-02, 8.711e-02, -1.359e-01, 3.914e-01, 1.422e-02, 3.627e-02, 1.423e-01) * s1_2_2;
	r2 += M4(-1.840e-01, -1.916e-01, -4.044e-02, -1.654e-01, 2.596e-01, 1.952e-03, -7.277e-02, -4.300e-01, -3.602e-01, 1.363e-01, -1.082e-01, 3.378e-01, 2.627e-01, -2.744e-01, 3.413e-02, -1.881e-01) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(-1.432e-01, 3.078e-01, 5.283e-02, 1.981e-02, 6.155e-01, -1.000e+00, 6.604e-01, -5.933e-01, 6.418e-01, -4.038e-01, 5.612e-01, -2.407e-01, 2.724e-01, -1.732e-01, 1.053e-01, -2.433e-01) * s0_0_0;
	r1 += M4(-9.992e-02, -8.062e-02, -7.088e-02, -3.721e-02, 2.384e-02, 3.591e-01, 2.577e-01, 1.549e-01, -2.320e-01, -3.406e-02, 3.043e-02, 5.951e-02, 1.561e-02, 5.972e-02, 2.794e-02, 3.511e-02) * s0_0_0;
	r2 += M4(-2.520e-01, -4.842e-02, -5.071e-02, 3.194e-01, 6.547e-01, -9.848e-01, -3.636e-01, -2.179e-01, 6.417e-02, 1.092e-01, -1.043e-03, -1.048e-02, 1.330e-01, 6.955e-02, 1.978e-01, -9.888e-02) * s0_0_0;
	r0 += M4(-2.236e-01, 2.418e-01, 1.234e-01, -4.369e-02, -7.207e-01, 7.360e-02, -4.756e-01, 3.453e-01, 2.798e-01, -9.032e-03, -1.943e-03, 1.302e-01, 2.519e-01, -3.381e-01, -4.118e-01, -4.756e-01) * s0_0_1;
	r1 += M4(7.987e-02, 4.228e-02, -1.145e-01, -1.346e-01, -2.212e-01, -1.075e-01, -7.992e-02, -1.409e-01, -3.491e-01, -1.045e-01, -2.251e-01, 1.658e-02, -7.848e-01, -3.410e-01, -1.133e-02, 1.577e-01) * s0_0_1;
	r2 += M4(-3.564e-01, 4.276e-01, 9.155e-02, 1.249e-01, 3.456e-01, 3.904e-01, -3.684e-02, 8.939e-02, -2.572e-01, 2.418e-01, 9.311e-02, 2.293e-01, -4.354e-02, 9.363e-02, -6.242e-02, -2.787e-01) * s0_0_1;
	r0 += M4(2.504e-01, 3.604e-01, 2.586e-02, 2.111e-01, 5.102e-01, -7.030e-02, -1.528e-01, 5.942e-02, 9.131e-02, -1.084e-02, 3.042e-01, 6.519e-03, 2.717e-01, -1.294e-01, 8.965e-01, 5.841e-01) * s0_0_2;
	r1 += M4(1.707e-01, 3.227e-02, -2.260e-03, -2.058e-02, -9.699e-02, 1.943e-02, -1.709e-01, 1.079e-01, -2.346e-01, 1.529e-02, -5.342e-02, -3.816e-02, 2.010e-01, 7.203e-02, 1.062e-02, 2.377e-02) * s0_0_2;
	r2 += M4(-4.225e-02, 4.627e-01, 2.740e-02, 2.390e-01, -1.646e-01, -1.984e-01, -8.960e-02, -2.616e-01, -2.675e-01, -3.756e-02, 2.246e-03, 1.219e-01, -2.647e-01, -2.825e-01, 6.041e-02, 9.384e-02) * s0_0_2;
	r0 += M4(9.415e-02, -1.462e-04, -1.117e-01, 3.501e-01, -2.541e-01, -1.147e-01, 1.463e-01, 8.212e-02, -8.108e-01, 2.979e-01, -5.939e-01, 2.457e-01, -4.098e-02, 4.232e-02, -3.170e-01, 3.451e-01) * s0_1_0;
	r1 += M4(1.090e-01, -1.607e-01, 3.761e-01, -9.847e-02, 3.380e-01, 8.274e-02, -6.154e-01, 1.929e-01, 3.167e-01, 1.933e-02, 8.584e-03, 2.625e-02, -3.847e-01, -2.040e-01, 2.443e-01, 1.834e-01) * s0_1_0;
	r2 += M4(-5.005e-02, 1.200e-01, 2.074e-01, 2.609e-02, 7.689e-02, 3.830e-01, 1.373e-01, 4.675e-01, 1.841e-01, 3.422e-01, 9.969e-02, -1.965e-01, -1.431e-01, 3.853e-01, -4.601e-02, -2.122e-01) * s0_1_0;
	r0 += M4(3.445e-01, -4.777e-01, 8.612e-01, -5.625e-01, 1.348e-01, -1.730e-01, 6.611e-01, 5.391e-01, -8.001e-01, 6.947e-01, -7.014e-01, 1.595e-01, -6.168e-01, -3.094e-02, -3.765e-01, 4.571e-01) * s0_1_1;
	r1 += M4(-1.890e-01, 9.220e-02, -1.344e-01, 2.765e-01, 5.242e-01, -3.061e-02, 9.760e-02, -9.203e-02, 4.429e-01, -1.426e-01, 1.098e-01, -3.173e-02, 6.183e-02, 3.566e-01, -5.504e-03, 3.066e-02) * s0_1_1;
	r2 += M4(5.686e-01, -2.601e-01, -1.891e-01, 3.053e-01, 1.939e-01, -2.516e-01, 1.697e-01, -1.259e-01, 6.289e-02, 6.582e-01, 2.028e-02, -2.388e-01, 3.458e-02, 5.460e-01, -1.023e-01, -1.390e-01) * s0_1_1;
	r0 += M4(-3.029e-01, -4.214e-01, -3.566e-01, -9.162e-02, 4.845e-02, 6.056e-01, 1.111e-01, -2.211e-01, 4.267e-01, 7.480e-02, 4.640e-01, 2.527e-01, 1.174e-01, 6.679e-02, -3.425e-01, -3.186e-01) * s0_1_2;
	r1 += M4(9.130e-02, 1.716e-01, -1.724e-01, -9.728e-02, -4.274e-01, 6.233e-02, -6.531e-02, -1.008e-01, 7.251e-02, 1.696e-01, -6.663e-02, -1.920e-01, -1.265e-01, -1.043e-01, -7.087e-02, -9.319e-02) * s0_1_2;
	r2 += M4(-1.747e-01, -8.007e-02, 2.075e-01, 2.212e-01, -4.929e-02, 4.310e-01, 2.277e-02, 1.101e-01, -4.365e-01, -3.508e-02, 1.724e-01, 2.562e-01, -1.696e-01, -6.621e-01, -1.677e-01, -4.799e-01) * s0_1_2;
	r0 += M4(-2.886e-02, 1.838e-01, 2.049e-01, 7.115e-02, -2.880e-03, 1.778e-01, -1.946e-01, 3.899e-02, 2.742e-01, -5.567e-01, 1.819e-01, -1.000e+00, -1.507e-01, 1.367e-01, -1.928e-02, 6.021e-02) * s0_2_0;
	r1 += M4(-1.585e-01, -4.186e-02, -9.742e-03, 1.363e-01, 7.490e-02, -4.099e-02, 8.522e-02, -6.765e-02, 8.903e-02, -9.842e-03, 6.208e-01, 9.149e-02, 3.436e-02, -1.675e-01, -1.585e-02, -2.334e-01) * s0_2_0;
	r2 += M4(-1.387e-01, 2.280e-01, 2.959e-02, -1.688e-01, -1.502e-01, -1.749e-02, -1.047e-01, 2.083e-01, 9.417e-01, -7.472e-01, -4.107e-01, 1.244e-01, -2.501e-02, -5.419e-02, 1.716e-01, 1.597e-01) * s0_2_0;
	r0 += M4(1.382e-01, -4.317e-01, -1.311e-02, 8.313e-02, -2.292e-01, 7.162e-01, -2.835e-01, 8.527e-02, 3.077e-01, -4.493e-01, -4.788e-01, 2.194e-01, -1.581e-01, 2.173e-01, 1.186e-01, 6.300e-01) * s0_2_1;
	r1 += M4(2.267e-01, 2.908e-02, 1.742e-02, -1.909e-01, 3.828e-01, -2.798e-02, -3.460e-02, -6.798e-02, 8.459e-02, -2.694e-02, 2.793e-01, 1.786e-01, 7.270e-01, -8.217e-02, 2.653e-02, -4.532e-02) * s0_2_1;
	r2 += M4(-1.520e-01, -1.300e-01, -4.290e-03, 1.684e-02, -1.475e-01, 1.119e-01, 2.520e-02, 2.402e-01, -2.087e-01, -4.138e-01, -1.070e-01, -5.648e-01, -3.025e-01, -3.417e-01, 3.776e-01, 1.090e-01) * s0_2_1;
	r0 += M4(-6.288e-02, 6.111e-01, -1.721e-01, 1.881e-01, -4.477e-02, -3.675e-02, -6.580e-02, 6.670e-02, -3.886e-01, 4.358e-01, 1.040e-01, -4.494e-01, -4.410e-02, 1.386e-01, 4.951e-02, -4.496e-01) * s0_2_2;
	r1 += M4(1.104e-02, 2.897e-02, -3.123e-02, 7.311e-02, 2.793e-02, 8.167e-03, -4.772e-02, -9.298e-03, -1.177e-01, -2.151e-03, -2.021e-01, -6.994e-02, -4.959e-01, 1.642e-02, 5.763e-03, 2.442e-04) * s0_2_2;
	r2 += M4(1.422e-01, 2.272e-01, 9.158e-02, 2.744e-01, -1.079e-01, 2.359e-02, -4.619e-02, 5.570e-02, -1.735e-01, -2.322e-01, 6.627e-02, 1.847e-01, 1.157e-02, 5.801e-01, -2.193e-02, -2.654e-02) * s0_2_2;
	r0 += V4(-1.999e-02, 1.836e-02, 3.515e-04, -3.747e-02);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-1.488e-02, -1.641e-02, 2.274e-02, 1.248e-02);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(-8.843e-03, -2.921e-02, 4.954e-03, -4.043e-02);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_2x12_DS] -conv2
//!HOOK LUMA
//!COMPUTE 24 8 8 8
//!BIND conv1
//!BIND LUMA
//!SAVE conv2
//!WIDTH LUMA.w 3 *
//!HEIGHT LUMA.h
//!COMPONENTS 4
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv1_mul * texelFetch(conv1_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(3, 1);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0, r1, r2;
	r0 = V4(0.0); r1 = V4(0.0); r2 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(-2.080e-02, -5.356e-02, -1.383e-01, -1.708e-03, -3.442e-02, -4.854e-01, -1.337e-01, -1.764e-01, 7.231e-02, -2.103e-02, 2.600e-03, -4.344e-02, -1.174e-01, 2.188e-01, 1.766e-01, 9.735e-02) * s0_0_0;
	r1 += M4(3.691e-02, 1.203e-01, -4.717e-02, 6.283e-03, -2.653e-01, 9.106e-02, -1.793e-01, -7.584e-02, -3.043e-02, -1.025e-01, 1.256e-03, 2.373e-02, -1.929e-01, 3.068e-02, 1.037e-01, 6.624e-02) * s0_0_0;
	r2 += M4(-8.212e-01, 4.691e-02, -6.189e-02, -5.538e-02, 5.251e-02, -4.437e-01, -2.771e-02, -2.592e-01, -3.282e-01, 1.927e-02, 3.828e-02, 6.193e-02, 9.936e-02, -1.194e-01, -5.420e-02, -2.036e-01) * s0_0_0;
	r0 += M4(-5.244e-02, -6.956e-02, -1.363e-01, -8.933e-02, -1.023e-01, -5.207e-02, -1.238e-01, -5.810e-02, -6.051e-03, 2.375e-02, 5.772e-02, 8.976e-02, -9.738e-02, -1.655e-01, 2.028e-02, 7.945e-02) * s0_0_1;
	r1 += M4(-5.094e-03, 1.717e-01, 1.206e-01, 3.894e-02, -8.090e-01, 3.718e-02, -1.404e-01, -6.089e-02, -2.560e-01, -1.385e-01, -6.060e-02, 4.470e-02, 4.241e-01, 7.546e-02, -8.667e-02, 1.012e-02) * s0_0_1;
	r2 += M4(-4.256e-01, -2.133e-01, 4.274e-02, -1.749e-01, 1.403e-01, -1.577e-01, -4.817e-02, -5.383e-01, -7.510e-01, 4.915e-02, -1.207e-02, -3.889e-02, 5.032e-01, 4.738e-02, -3.283e-02, 8.887e-02) * s0_0_1;
	r0 += M4(-5.989e-02, -5.946e-03, 8.880e-03, 5.110e-02, 2.557e-02, -8.348e-02, -8.686e-02, -9.928e-02, 3.353e-02, 1.836e-02, 6.231e-03, -4.310e-02, -6.506e-02, 2.303e-02, -2.886e-02, 7.266e-02) * s0_0_2;
	r1 += M4(-1.970e-01, 4.006e-02, -5.783e-02, 8.680e-02, 2.444e-01, 4.419e-02, -2.171e-02, -8.366e-02, 1.509e-01, -9.072e-02, 6.213e-02, -2.677e-02, -9.917e-03, 3.160e-02, -1.252e-02, -8.988e-03) * s0_0_2;
	r2 += M4(1.380e-02, 1.125e-02, 9.682e-02, -9.549e-02, 1.487e-01, 5.800e-02, -6.476e-02, 1.066e-01, -2.721e-01, -5.075e-02, -1.272e-02, -1.828e-02, 1.441e-02, -5.657e-02, -3.927e-02, -5.261e-02) * s0_0_2;
	r0 += M4(-1.470e-01, -3.631e-01, -3.733e-01, -5.053e-02, -7.716e-02, -3.852e-01, -3.389e-01, -2.042e-01, -3.034e-01, -2.277e-01, -5.565e-02, -5.459e-02, -8.243e-02, 1.364e-01, 1.180e-01, 5.198e-02) * s0_1_0;
	r1 += M4(-1.382e-01, 2.388e-01, -2.108e-01, -8.502e-02, -2.221e-01, -1.755e-02, -1.258e-02, -5.254e-01, -6.578e-02, -1.392e-01, -1.792e-01, -3.396e-03, -2.163e-01, -2.270e-02, -6.064e-02, -6.897e-03) * s0_1_0;
	r2 += M4(-1.065e-01, -1.579e-01, 2.770e-01, -1.935e-01, -9.411e-02, -3.879e-01, -1.024e-01, -3.390e-01, -5.010e-02, -2.121e-01, 8.081e-02, -4.743e-02, -5.355e-02, -6.896e-02, 4.213e-02, -2.187e-01) * s0_1_0;
	r0 += M4(-9.638e-02, 2.269e-01, -9.056e-01, 7.182e-02, 1.460e-01, 2.159e-01, 2.904e-02, -3.951e-01, -2.932e-01, -2.534e-01, -2.391e-01, -2.099e-01, -3.953e-02, -2.663e-01, -1.266e-01, 1.899e-01) * s0_1_1;
	r1 += M4(-4.174e-01, 7.686e-01, 1.292e-01, -4.839e-02, -2.067e-01, 2.409e-01, -6.192e-02, -3.435e-01, -3.037e-01, -3.650e-01, -4.252e-01, 2.556e-02, 4.214e-01, 1.014e-01, 4.005e-01, 1.596e-01) * s0_1_1;
	r2 += M4(-1.691e-01, 2.835e-02, 2.556e-03, -1.996e-01, -1.317e-01, 6.518e-02, -8.604e-02, -1.915e-01, -2.006e-01, -1.540e-01, 7.277e-02, -4.456e-01, -5.410e-01, -2.339e-01, 3.213e-01, 3.133e-01) * s0_1_1;
	r0 += M4(-1.057e-01, -5.788e-02, -8.419e-02, -1.614e-01, 2.782e-02, -9.091e-03, -5.079e-03, -4.821e-02, 2.065e-02, 1.145e-01, 1.451e-01, 7.885e-02, -5.799e-02, 9.144e-02, -2.082e-02, -1.712e-01) * s0_1_2;
	r1 += M4(-5.488e-02, 2.017e-01, 5.914e-02, -4.969e-01, 9.094e-02, 1.431e-01, 3.055e-03, -2.064e-01, 1.114e-01, -1.919e-01, -8.279e-02, 1.884e-01, -5.261e-01, 9.872e-02, 1.421e-02, -2.755e-02) * s0_1_2;
	r2 += M4(6.626e-03, -4.781e-02, 8.959e-02, 9.111e-03, -6.038e-02, -1.792e-02, -7.005e-02, 1.148e-01, -1.412e-01, -2.815e-02, 7.665e-02, 5.955e-02, -6.509e-02, -8.722e-02, 8.504e-02, -1.692e-01) * s0_1_2;
	r0 += M4(-1.057e-01, -3.139e-02, -1.401e-01, -1.518e-01, 3.383e-02, -1.734e-01, -4.875e-02, 1.429e-01, -4.680e-02, -4.904e-02, -8.106e-02, -4.165e-02, -5.239e-02, -6.003e-02, -1.110e-01, -9.090e-03) * s0_2_0;
	r1 += M4(-2.185e-02, 1.465e-01, -1.893e-01, -1.129e-01, -6.873e-03, 1.453e-01, -1.006e-01, 4.392e-02, -4.278e-02, -1.106e-01, -4.156e-02, -1.985e-02, -1.199e-02, 9.172e-02, -6.171e-02, 3.558e-02) * s0_2_0;
	r2 += M4(-6.886e-02, -1.510e-01, -3.199e-01, -1.714e-01, 9.170e-02, 1.465e-01, -3.540e-01, -7.276e-03, 3.257e-02, -4.608e-02, -5.657e-02, -1.128e-01, 5.191e-02, 3.809e-02, 7.541e-02, -1.940e-02) * s0_2_0;
	r0 += M4(-1.313e-01, 4.875e-02, -2.037e-01, -1.341e-01, -6.406e-03, 4.824e-02, 1.548e-01, 1.226e-02, -1.568e-01, -9.395e-02, 1.495e-02, -2.082e-01, -1.382e-01, 8.494e-02, 1.652e-01, -7.042e-02) * s0_2_1;
	r1 += M4(-7.364e-02, 2.901e-01, -1.328e-01, -4.449e-03, -4.278e-02, 1.557e-01, -1.102e-01, 1.568e-02, -1.923e-01, -1.470e-01, -1.657e-01, -3.853e-02, 3.210e-02, 4.969e-02, -1.944e-02, 3.478e-02) * s0_2_1;
	r2 += M4(8.360e-03, -7.321e-02, -1.977e-01, -1.998e-01, 8.678e-02, -8.980e-02, -5.323e-02, 1.230e-02, 6.546e-02, -1.969e-01, -4.486e-01, -3.692e-01, 1.411e-01, -9.562e-02, 3.715e-03, 1.101e-01) * s0_2_1;
	r0 += M4(-9.556e-02, -1.019e-01, -1.537e-01, 1.222e-03, 1.287e-02, -2.915e-02, 1.823e-02, 7.225e-03, 7.938e-02, 1.210e-01, 1.135e-01, -8.763e-02, -1.479e-01, -8.179e-02, 4.094e-02, 1.940e-01) * s0_2_2;
	r1 += M4(-6.383e-02, 7.707e-02, -2.365e-02, 1.502e-01, -2.250e-02, 5.705e-02, 1.105e-02, 5.524e-02, 1.284e-01, -5.440e-02, -5.479e-02, -1.628e-01, -1.386e-02, 2.445e-02, 6.165e-02, 2.647e-01) * s0_2_2;
	r2 += M4(1.028e-02, -9.623e-03, -2.762e-01, 7.163e-04, 5.092e-02, -2.853e-02, 3.333e-02, 3.943e-03, 3.855e-02, 4.016e-02, 7.572e-03, 1.010e-01, 1.133e-01, -5.442e-02, -1.175e-01, -1.821e-01) * s0_2_2;
	r0 += M4(2.119e-02, -1.354e-01, -1.310e-01, -1.070e-01, -7.604e-02, -8.810e-02, 1.815e-03, -3.262e-02, 2.507e-02, 3.904e-02, 9.569e-02, -3.983e-02, -3.413e-02, -2.700e-01, 1.103e-01, -1.145e-02) * s1_0_0;
	r1 += M4(2.351e-01, -6.465e-02, -4.582e-02, -1.085e-01, 1.106e-01, -4.765e-03, 3.042e-02, 2.252e-02, 3.288e-02, 1.582e-02, 9.864e-02, 3.347e-02, 1.659e-02, 1.477e-01, -1.495e-02, -1.450e-02) * s1_0_0;
	r2 += M4(-2.000e-03, 1.015e-01, 7.636e-02, 1.963e-01, 2.340e-02, 4.944e-02, 2.527e-02, 1.113e-01, -1.636e-01, -5.722e-02, 9.773e-02, 1.172e-02, -2.180e-01, -1.902e-01, -2.321e-01, -1.652e-01) * s1_0_0;
	r0 += M4(-1.097e-01, -9.268e-02, -1.576e-01, -4.244e-02, 7.321e-04, -5.421e-02, 1.946e-01, 1.255e-01, 5.945e-02, 1.078e-01, -1.180e-02, 1.062e-02, -1.674e-01, 9.109e-02, 2.078e-01, 1.607e-01) * s1_0_1;
	r1 += M4(-7.958e-01, -1.194e-02, 7.324e-02, 3.425e-03, -1.802e-01, 8.583e-02, -3.159e-01, 1.283e-01, 1.104e-01, -9.402e-02, 1.593e-01, 1.136e-02, -1.440e-01, 1.297e-01, -1.320e-01, -6.227e-04) * s1_0_1;
	r2 += M4(-5.130e-01, -2.124e-01, 5.024e-02, -3.938e-01, -2.751e-01, 7.603e-02, -3.081e-02, -4.354e-02, -2.997e-01, 4.292e-02, 1.033e-01, 1.182e-01, -4.117e-02, 3.698e-02, -5.208e-02, -6.919e-03) * s1_0_1;
	r0 += M4(-3.796e-02, -9.739e-02, -1.227e-02, -5.267e-02, 5.090e-02, 1.655e-01, 1.393e-02, 2.265e-02, 1.018e-01, 2.877e-02, -2.384e-02, -5.062e-02, 1.694e-02, -6.383e-02, -9.788e-02, -1.667e-01) * s1_0_2;
	r1 += M4(-1.651e-01, 5.561e-03, -9.756e-02, 3.599e-02, 1.606e-01, -9.789e-03, 8.890e-02, -1.350e-01, 5.677e-02, -5.528e-02, 1.519e-01, -1.005e-01, 2.631e-02, 4.435e-02, 4.944e-02, -1.498e-01) * s1_0_2;
	r2 += M4(-1.007e-01, -9.541e-03, 4.974e-02, -1.177e-01, 1.151e-01, -1.194e-02, -9.541e-02, 1.309e-01, 5.017e-02, -2.358e-02, 8.345e-02, 9.835e-02, -1.498e-01, -4.197e-02, 5.502e-02, -4.377e-02) * s1_0_2;
	r0 += M4(1.431e-02, -3.933e-02, 1.045e-02, 8.273e-02, 6.705e-02, 4.547e-01, -9.617e-02, 6.401e-02, 2.128e-02, 2.065e-01, -4.841e-02, 4.181e-02, -2.789e-01, -1.639e-01, -2.523e-01, -1.723e-02) * s1_1_0;
	r1 += M4(1.241e-01, 2.605e-03, 8.392e-02, 7.266e-02, 2.071e-02, 7.300e-02, 2.025e-01, -6.000e-02, 5.595e-02, -1.239e-01, 5.442e-02, -5.428e-02, 3.225e-02, 1.174e-01, 3.210e-02, -2.281e-02) * s1_1_0;
	r2 += M4(1.116e-01, -9.186e-02, -1.158e-01, 1.294e-01, -1.772e-01, -6.324e-02, 5.255e-02, -1.063e-02, 4.100e-02, 1.263e-01, 1.030e-02, -4.985e-02, 7.303e-02, 1.019e-01, 2.611e-01, 1.079e-02) * s1_1_0;
	r0 += M4(-7.805e-02, -2.986e-01, -3.921e-01, -7.285e-01, 2.037e-01, -3.155e-01, -9.210e-02, -4.248e-01, -1.707e-01, 1.186e-01, -4.076e-01, -1.804e-01, 9.787e-02, 9.450e-02, 5.137e-01, -1.169e-01) * s1_1_1;
	r1 += M4(-2.388e-01, -2.389e-02, -5.020e-01, -7.066e-01, 2.133e-01, 3.926e-01, 2.395e-01, -4.619e-01, -3.370e-02, -6.792e-01, 2.796e-01, -8.134e-02, -2.429e-01, -5.762e-01, -1.433e-03, 3.612e-01) * s1_1_1;
	r2 += M4(2.923e-01, -2.122e-01, -4.211e-01, -3.737e-01, -7.221e-02, -2.634e-01, 1.584e-01, 2.695e-02, 2.763e-02, -2.437e-01, -1.379e-02, 9.117e-02, 2.345e-01, -6.217e-02, -2.410e-01, 1.070e-01) * s1_1_1;
	r0 += M4(-9.434e-02, -1.988e-01, -2.301e-01, 1.133e-02, 4.166e-01, 4.930e-01, 5.676e-01, 1.889e-01, 7.355e-02, 8.438e-02, -2.564e-02, 1.097e-01, -7.598e-02, -9.038e-02, -4.925e-03, 1.849e-01) * s1_1_2;
	r1 += M4(-5.144e-01, -1.202e-01, -7.289e-02, -3.065e-01, 3.232e-01, -5.691e-02, -6.277e-02, 3.053e-01, 2.550e-01, 3.331e-02, 6.623e-02, -7.236e-02, -6.069e-02, 7.730e-02, -4.973e-02, 3.857e-01) * s1_1_2;
	r2 += M4(1.929e-01, -7.343e-02, -1.308e-01, -3.611e-01, 1.169e-01, 2.236e-01, -2.708e-02, 1.702e-01, -1.772e-04, 1.677e-01, -1.992e-01, 1.604e-01, 8.662e-02, -2.356e-02, -3.106e-02, -8.959e-02) * s1_1_2;
	r0 += M4(5.206e-02, 4.624e-02, 1.665e-01, -4.407e-02, -8.684e-05, 1.923e-02, 2.045e-01, -1.941e-02, -2.122e-02, -4.677e-02, 2.279e-01, -2.858e-02, 2.550e-02, -5.190e-03, -8.718e-02, -6.070e-03) * s1_2_0;
	r1 += M4(7.489e-02, -1.024e-02, 7.919e-02, -9.103e-02, 7.141e-02, -4.223e-02, -1.504e-02, 6.236e-02, 1.240e-02, -5.816e-02, -2.620e-02, 3.209e-02, 2.637e-02, 1.870e-01, 6.496e-02, -9.202e-02) * s1_2_0;
	r2 += M4(-6.519e-02, -1.431e-02, -1.743e-01, 6.708e-02, 1.294e-01, 4.548e-02, -9.034e-02, 1.457e-01, 1.845e-02, -9.884e-02, -2.183e-01, 3.544e-02, -1.178e-01, -4.794e-03, -1.455e-01, -3.338e-02) * s1_2_0;
	r0 += M4(2.375e-02, -9.611e-02, -1.253e-01, 2.798e-01, 1.401e-01, -5.845e-02, 5.834e-02, 1.744e-01, 6.088e-02, -3.867e-03, -1.334e-01, 8.369e-02, 7.199e-02, -1.641e-02, -1.024e-01, 1.750e-01) * s1_2_1;
	r1 += M4(-1.044e-02, 1.865e-02, 1.006e-01, 2.515e-01, 8.673e-03, -5.530e-01, 7.865e-02, 2.173e-01, 3.569e-02, -2.054e-01, 1.392e-01, -1.864e-03, 6.382e-02, 1.098e-01, 1.515e-01, -5.136e-02) * s1_2_1;
	r2 += M4(-5.565e-02, 8.627e-02, -4.216e-01, -3.444e-02, 1.799e-01, 1.447e-01, -3.919e-01, 1.994e-01, -1.993e-02, 9.584e-02, -4.752e-01, 2.175e-02, 1.234e-02, 1.010e-01, 3.689e-01, 8.478e-02) * s1_2_1;
	r0 += M4(1.009e-01, -8.635e-03, -5.502e-02, -7.514e-02, 6.806e-02, 1.693e-01, 3.753e-01, -8.348e-02, -6.023e-02, 7.168e-03, 6.619e-02, -1.001e-01, -3.603e-02, -7.107e-02, -1.182e-01, -9.790e-02) * s1_2_2;
	r1 += M4(-1.009e-02, -4.825e-02, 4.610e-02, -4.803e-01, 4.476e-03, -2.085e-01, -6.277e-02, -1.811e-01, -5.640e-03, -4.151e-02, 1.262e-02, -7.816e-02, -1.127e-01, 7.523e-02, -2.966e-02, -1.507e-01) * s1_2_2;
	r2 += M4(-5.521e-02, 7.396e-02, -3.820e-01, 1.293e-02, -5.109e-02, -1.883e-03, -3.452e-02, -5.744e-02, -2.505e-02, -1.059e-01, -7.582e-02, 8.270e-02, -9.538e-02, -4.504e-02, 2.545e-01, -1.212e-01) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(1.672e-02, 1.233e-01, -4.723e-02, 5.029e-02, 5.419e-04, 1.143e-01, 4.531e-02, 3.753e-02, -5.800e-02, -1.007e-01, -1.221e-01, -2.120e-02, 4.676e-02, 2.771e-02, -4.149e-03, 3.184e-02) * s0_0_0;
	r1 += M4(-2.461e-02, -1.719e-02, 2.923e-02, 5.161e-02, -1.886e-02, -2.867e-03, -1.949e-03, 6.423e-02, -1.204e-01, -1.911e-01, 2.528e-02, -1.759e-02, -2.934e-02, -9.887e-03, -1.621e-02, 9.352e-03) * s0_0_0;
	r2 += M4(2.171e-02, 2.696e-02, 1.052e-01, 2.714e-02, 1.726e-01, 1.692e-02, 1.093e-01, 9.445e-02, 2.958e-01, 5.126e-02, 1.257e-02, 2.939e-02, -2.282e-01, -3.963e-02, 2.227e-02, -8.447e-02) * s0_0_0;
	r0 += M4(1.119e-03, -2.109e-01, -1.739e-01, -5.335e-02, 2.669e-01, 9.139e-02, -1.839e-01, -1.113e-01, -4.960e-02, -1.315e-03, -1.151e-01, -2.757e-02, -6.461e-02, -5.778e-02, -1.059e-01, -1.857e-02) * s0_0_1;
	r1 += M4(8.742e-02, 4.236e-02, -4.113e-03, 5.042e-02, 3.722e-01, 4.042e-02, 1.317e-01, -6.227e-02, -3.999e-02, 4.098e-02, 4.682e-02, 1.235e-01, -8.830e-02, -3.545e-02, -4.534e-02, -6.953e-02) * s0_0_1;
	r2 += M4(-4.874e-01, 1.383e-01, 1.293e-01, 4.608e-02, 1.255e-01, 1.173e-02, 1.042e-01, 1.198e-01, 5.762e-01, -1.882e-01, 1.700e-01, -2.162e-01, -3.210e-01, 4.535e-02, 2.715e-02, -3.477e-02) * s0_0_1;
	r0 += M4(-4.299e-02, -2.680e-02, -2.612e-02, 4.805e-02, 9.360e-02, -1.389e-03, -7.892e-02, -8.273e-02, -1.542e-01, -2.076e-01, -6.823e-02, -1.702e-01, -2.919e-02, 4.991e-02, 2.970e-02, 6.091e-02) * s0_0_2;
	r1 += M4(-2.803e-01, 6.227e-02, -1.236e-02, 8.519e-02, -2.446e-01, 4.926e-03, -8.460e-02, -2.068e-01, -2.529e-01, 3.219e-02, -1.026e-01, 1.723e-01, 4.101e-02, -2.998e-02, -1.199e-01, 9.105e-02) * s0_0_2;
	r2 += M4(-6.216e-02, -4.872e-02, 5.312e-02, -1.160e-01, 1.417e-02, 1.076e-01, 7.354e-02, 1.686e-01, 4.337e-02, -4.967e-02, 6.709e-02, -3.443e-02, 6.688e-02, -4.721e-03, -2.351e-02, -2.817e-02) * s0_0_2;
	r0 += M4(2.378e-02, 3.072e-01, 1.073e-01, -8.590e-02, 2.690e-01, 2.155e-01, 1.870e-01, -1.525e-02, -1.727e-02, -2.725e-01, 1.596e-01, -2.090e-02, -1.503e-01, -2.388e-01, -2.920e-01, -9.180e-02) * s0_1_0;
	r1 += M4(8.601e-02, 1.780e-02, 9.701e-02, -5.086e-02, -1.488e-01, 1.078e-01, -5.391e-02, 3.406e-02, -1.724e-01, -1.459e-01, -2.671e-01, 5.002e-02, 1.926e-01, -5.558e-02, -2.086e-02, -2.927e-02) * s0_1_0;
	r2 += M4(-6.800e-02, 6.717e-03, -7.897e-02, 1.496e-01, 6.401e-02, 1.327e-01, -8.128e-02, -1.066e-01, -1.681e-01, -1.411e-01, 2.025e-02, -3.512e-02, 1.716e-01, 8.313e-02, -1.636e-01, 1.271e-01) * s0_1_0;
	r0 += M4(-5.875e-01, -5.467e-01, -5.819e-01, -1.469e-01, -1.556e-01, -1.091e-01, 2.705e-01, 1.841e-01, 5.151e-01, 1.115e-01, -2.153e-01, 4.249e-01, -4.864e-02, 1.699e-01, -1.254e-01, -2.649e-02) * s0_1_1;
	r1 += M4(-4.479e-02, 3.610e-01, -2.761e-01, -2.318e-01, 6.106e-01, 3.646e-03, 1.657e-01, 1.490e-01, 6.540e-02, -2.873e-01, 2.872e-02, 3.174e-02, -5.891e-01, 5.397e-03, -1.784e-01, 6.928e-03) * s0_1_1;
	r2 += M4(-1.686e-02, -4.183e-01, 2.067e-02, -3.621e-01, 2.728e-01, 1.270e-01, -4.017e-01, 6.387e-01, -2.066e-01, 5.797e-01, -6.413e-01, -2.934e-01, -1.748e-01, -2.562e-01, -4.253e-02, -3.270e-01) * s0_1_1;
	r0 += M4(-7.913e-02, 1.432e-02, -8.497e-02, -2.173e-01, 3.937e-02, 1.381e-01, 1.367e-01, 1.561e-01, -1.124e-01, -2.609e-01, -4.885e-01, 2.913e-01, -2.173e-02, -1.628e-01, -2.285e-01, -1.909e-01) * s0_1_2;
	r1 += M4(2.748e-02, -1.720e-02, -5.707e-02, -3.348e-01, -9.198e-02, -3.978e-02, -1.150e-01, 2.892e-01, -2.344e-01, 4.577e-02, 1.272e-01, 8.226e-03, 1.103e-01, 7.223e-02, 4.683e-02, -2.274e-01) * s0_1_2;
	r2 += M4(-3.800e-02, -3.236e-02, -1.132e-01, -1.217e-02, 9.735e-02, 3.908e-02, -9.147e-02, -1.061e-01, -9.810e-02, 5.407e-02, -9.104e-02, -6.078e-02, -2.485e-01, 5.415e-02, 2.639e-01, 5.796e-02) * s0_1_2;
	r0 += M4(-4.095e-02, 1.082e-01, -1.926e-02, -6.175e-02, 1.785e-01, 2.163e-01, 8.416e-02, -1.217e-02, 5.246e-02, 1.332e-02, -9.596e-02, -1.474e-02, -3.800e-02, -3.232e-01, -3.361e-01, -1.418e-02) * s0_2_0;
	r1 += M4(2.654e-02, 1.360e-01, 7.814e-02, -2.840e-02, 6.216e-02, 3.267e-02, 5.340e-03, 8.871e-02, -6.325e-02, 7.997e-02, -3.611e-04, -3.686e-02, -1.029e-02, 1.354e-02, 6.239e-02, -8.941e-03) * s0_2_0;
	r2 += M4(7.435e-03, -1.145e-01, 1.345e-01, 7.593e-02, -8.300e-02, 8.618e-02, 4.549e-01, 1.161e-01, -4.805e-02, 1.005e-02, 1.489e-01, -1.048e-01, 6.054e-02, -8.956e-02, -1.547e-01, 1.212e-01) * s0_2_0;
	r0 += M4(-6.714e-02, -6.557e-02, -1.043e-01, -1.284e-01, -2.860e-02, -1.718e-01, -1.488e-01, 7.761e-03, -1.255e-01, 2.517e-02, 9.390e-02, -1.095e-01, 1.301e-01, 2.056e-01, 1.590e-01, -2.336e-02) * s0_2_1;
	r1 += M4(-4.111e-02, 1.881e-01, 2.580e-02, -1.163e-01, 7.054e-02, -2.123e-02, 1.035e-03, 1.175e-01, 6.771e-02, 1.147e-01, -8.833e-02, 2.555e-02, -7.652e-03, 9.356e-02, -1.838e-01, -1.655e-01) * s0_2_1;
	r2 += M4(-4.007e-03, -1.693e-02, -2.866e-01, -5.989e-02, -8.501e-04, -1.524e-02, 6.466e-01, 5.537e-02, 2.299e-02, -1.013e-01, 4.700e-01, 2.150e-02, 6.689e-02, 2.212e-01, -3.193e-01, -2.038e-01) * s0_2_1;
	r0 += M4(3.501e-02, 2.243e-03, 1.818e-02, -3.258e-02, 5.958e-02, 7.079e-02, 9.587e-02, -3.387e-02, -2.359e-02, -6.741e-02, -9.760e-02, -8.728e-02, -1.773e-01, -1.227e-01, -1.047e-01, 7.031e-02) * s0_2_2;
	r1 += M4(4.004e-02, 5.875e-02, -5.903e-03, -2.232e-05, 2.631e-02, -4.737e-02, -7.404e-03, -2.247e-01, -7.112e-02, 7.432e-02, -2.405e-02, 2.908e-02, -8.126e-02, 3.770e-02, -2.645e-02, 1.323e-01) * s0_2_2;
	r2 += M4(1.843e-02, 6.568e-02, -5.222e-02, 1.293e-02, -7.568e-02, 7.785e-02, 1.432e-01, 2.949e-02, -6.884e-03, -4.184e-02, 6.521e-02, -3.865e-02, 1.080e-01, -2.054e-01, -3.042e-01, 1.441e-01) * s0_2_2;
	r0 += V4(-1.251e-02, -9.490e-03, 1.249e-02, -9.247e-03);
	r0 = clamp(r0, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0));
	r1 += V4(-6.161e-03, 2.679e-02, -2.325e-02, -8.886e-03);
	r1 = clamp(r1, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r1));
	r2 += V4(-1.745e-02, -9.552e-03, -9.257e-03, 6.033e-04);
	r2 = clamp(r2, V4(0.0), V4(1.0));
	imageStore(out_image, opos + ivec2(2, 0), vec4(r2));
}

//!DESC [CuNNy_2x12_DS] -out-shuffle
//!HOOK LUMA
//!COMPUTE 16 16 8 8
//!BIND conv2
//!BIND LUMA
//!WIDTH LUMA.w 2 *
//!HEIGHT LUMA.h 2 *
//!COMPONENTS 1
//!WHEN OUTPUT.w LUMA.w 1.200 * > OUTPUT.h LUMA.h 1.200 * > *
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : enable
#ifdef GL_EXT_shader_explicit_arithmetic_types_float16
#	define V4 f16vec4
#	define M4 f16mat4
#	define F float16_t
#else
#	define V4 vec4
#	define M4 mat4
#	define F float
#endif
#define l0(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(0, 0), 0)))
#define l1(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(1, 0), 0)))
#define l2(x, y) V4((conv2_mul * texelFetch(conv2_raw, clamp(pos + ivec2(x, y), ivec2(0), sz) * ivec2(3, 1) + ivec2(2, 0), 0)))
shared V4 G[3][10][10];
void hook() {
	ivec2 xy = ivec2(gl_LocalInvocationID.xy);
	ivec2 pos = ivec2(gl_WorkGroupID.xy) * ivec2(8, 8) + xy;
	ivec2 opos = pos * ivec2(2, 2);
	ivec2 sz = ivec2(LUMA_size) - ivec2(1);
	for (int y = 0; y < 10; y += 8) {
		int ay = xy.y + y;
		if (ay >= 10) break;
		for (int x = 0; x < 10; x += 8) {
			int ax = xy.x + x;
			if (ax >= 10) break;
			G[0][ay][ax] = l0(x - 1, y - 1);
			G[1][ay][ax] = l1(x - 1, y - 1);
			G[2][ay][ax] = l2(x - 1, y - 1);
		}
	}
	barrier();
	V4 s0_0_0, s0_0_1, s0_0_2, s0_1_0, s0_1_1, s0_1_2, s0_2_0, s0_2_1, s0_2_2, s1_0_0, s1_0_1, s1_0_2, s1_1_0, s1_1_1, s1_1_2, s1_2_0, s1_2_1, s1_2_2;
	V4 r0;
	r0 = V4(0.0);
	s0_0_0 = G[0][xy.y+0][xy.x+0]; s0_0_1 = G[0][xy.y+0][xy.x+1];
	s0_0_2 = G[0][xy.y+0][xy.x+2]; s0_1_0 = G[0][xy.y+1][xy.x+0];
	s0_1_1 = G[0][xy.y+1][xy.x+1]; s0_1_2 = G[0][xy.y+1][xy.x+2];
	s0_2_0 = G[0][xy.y+2][xy.x+0]; s0_2_1 = G[0][xy.y+2][xy.x+1];
	s0_2_2 = G[0][xy.y+2][xy.x+2]; s1_0_0 = G[1][xy.y+0][xy.x+0];
	s1_0_1 = G[1][xy.y+0][xy.x+1]; s1_0_2 = G[1][xy.y+0][xy.x+2];
	s1_1_0 = G[1][xy.y+1][xy.x+0]; s1_1_1 = G[1][xy.y+1][xy.x+1];
	s1_1_2 = G[1][xy.y+1][xy.x+2]; s1_2_0 = G[1][xy.y+2][xy.x+0];
	s1_2_1 = G[1][xy.y+2][xy.x+1]; s1_2_2 = G[1][xy.y+2][xy.x+2];
	r0 += M4(9.440e-03, 1.458e-03, -6.458e-03, -2.700e-03, 5.549e-03, -7.639e-04, 2.096e-03, -8.989e-04, 2.640e-02, -1.410e-03, 1.869e-02, 1.409e-03, 1.023e-01, 2.487e-02, -1.416e-02, 3.629e-02) * s0_0_0;
	r0 += M4(1.509e-01, 8.438e-02, 1.143e-02, 7.978e-03, -5.750e-02, -4.674e-03, 3.891e-04, -1.231e-02, -1.345e-01, -1.840e-02, 2.323e-02, -1.079e-02, 1.333e-01, 1.919e-01, 1.433e-02, 2.509e-02) * s0_0_1;
	r0 += M4(2.302e-03, 5.351e-02, 8.480e-03, 6.431e-03, 2.905e-03, 2.131e-02, 3.915e-03, -1.758e-02, 3.072e-03, -3.438e-02, -3.509e-05, 1.477e-02, 8.195e-03, 2.424e-02, 5.391e-04, 1.488e-02) * s0_0_2;
	r0 += M4(7.642e-02, -1.506e-02, 4.578e-02, 6.141e-03, 4.553e-02, -7.810e-03, 3.074e-02, -1.146e-02, 2.712e-02, 4.883e-03, 4.200e-02, -2.749e-03, 8.652e-02, -5.921e-02, -1.753e-01, -3.594e-02) * s0_1_0;
	r0 += M4(-3.115e-01, 1.107e-01, 1.518e-01, 2.172e-01, -3.271e-01, 1.567e-01, -3.639e-01, 7.900e-02, 1.036e-01, 1.448e-01, -1.876e-01, 1.294e-01, -1.135e-01, 1.738e-01, -6.591e-02, -4.482e-01) * s0_1_1;
	r0 += M4(1.008e-02, -2.001e-01, -3.162e-02, -1.160e-01, 1.728e-02, 1.439e-01, 1.267e-02, 1.849e-01, -7.903e-03, -7.609e-02, -4.156e-03, -1.518e-01, 1.712e-02, -2.836e-02, -2.087e-02, 2.260e-02) * s0_1_2;
	r0 += M4(1.232e-02, 1.138e-02, 1.824e-02, -4.395e-04, -2.328e-03, -2.045e-03, 2.178e-02, 1.811e-03, -2.592e-03, -1.386e-03, 4.552e-03, 4.235e-04, -3.959e-03, -3.804e-04, -1.466e-02, -1.121e-02) * s0_2_0;
	r0 += M4(2.040e-02, -2.045e-02, -1.567e-01, -8.699e-02, 1.209e-02, -3.504e-02, -2.003e-02, 2.460e-02, 1.098e-03, -1.320e-03, 7.594e-02, 3.979e-02, 9.844e-03, -1.918e-02, 1.771e-02, 6.114e-02) * s0_2_1;
	r0 += M4(4.157e-04, 7.436e-03, 2.223e-04, -2.286e-02, -2.104e-03, 9.648e-03, 1.889e-03, 1.958e-02, -1.945e-03, -3.176e-03, 1.402e-02, 5.585e-03, -4.020e-03, -3.422e-03, -1.357e-02, -1.231e-02) * s0_2_2;
	r0 += M4(-9.219e-04, 1.067e-02, 6.003e-03, 4.274e-03, 1.317e-02, 1.407e-02, 1.695e-02, 5.354e-04, 7.108e-03, 1.250e-03, 3.536e-03, 1.130e-03, -2.529e-01, -5.931e-02, 2.179e-02, -6.267e-03) * s1_0_0;
	r0 += M4(-1.188e-02, 3.071e-02, 5.405e-03, -9.395e-03, -7.347e-03, -1.450e-02, 3.142e-02, 3.383e-02, 6.958e-02, 6.564e-02, -2.168e-03, -6.023e-03, -3.457e-02, -2.212e-01, -3.443e-02, 2.412e-02) * s1_0_1;
	r0 += M4(3.835e-03, -1.222e-03, -6.219e-04, -1.726e-03, 1.053e-02, 1.544e-02, -6.418e-03, 1.120e-02, -4.585e-03, 1.540e-02, -3.284e-03, -2.096e-03, -1.085e-02, -2.486e-02, 1.199e-03, -2.206e-02) * s1_0_2;
	r0 += M4(1.880e-01, -2.005e-02, 9.356e-02, -6.820e-03, -1.041e-02, 2.891e-02, -1.238e-02, 2.985e-02, -8.709e-02, 2.214e-02, -8.611e-02, 9.603e-03, -7.273e-04, 1.492e-02, 2.154e-01, 8.375e-02) * s1_1_0;
	r0 += M4(9.604e-02, -3.090e-01, -3.613e-03, -7.105e-02, -1.194e-01, -1.199e-01, -1.168e-01, -1.322e-01, 4.389e-02, -2.666e-01, 1.958e-01, 6.058e-03, 1.357e-02, -5.326e-03, 6.519e-02, 1.595e-01) * s1_1_1;
	r0 += M4(-3.195e-03, -7.204e-03, 7.797e-04, -5.749e-03, 4.041e-02, 3.136e-03, 4.230e-02, 5.178e-03, 1.800e-02, 1.284e-01, 1.478e-02, 9.075e-02, -6.532e-04, 7.743e-03, 7.242e-03, 1.729e-02) * s1_1_2;
	r0 += M4(4.188e-02, 1.297e-02, 1.209e-01, 4.563e-03, 2.177e-02, 3.521e-03, 1.837e-02, 1.764e-02, 8.617e-03, 1.867e-03, -2.678e-03, 1.357e-02, 1.339e-03, 7.034e-04, -4.800e-04, -3.468e-03) * s1_2_0;
	r0 += M4(-4.626e-02, -1.761e-03, 3.884e-02, -1.890e-01, 3.670e-02, 4.163e-02, -3.976e-03, -6.805e-04, -2.983e-02, 2.953e-02, -1.042e-01, -1.275e-01, 4.059e-04, 6.681e-04, 1.813e-03, -2.442e-03) * s1_2_1;
	r0 += M4(1.841e-04, 5.195e-03, -2.239e-04, 3.067e-03, -7.669e-03, 7.809e-03, 6.520e-03, 1.144e-02, 5.097e-03, -9.792e-03, 4.629e-03, 3.235e-02, -4.693e-04, 6.857e-04, 1.249e-03, 7.149e-03) * s1_2_2;
	s0_0_0 = G[2][xy.y+0][xy.x+0]; s0_0_1 = G[2][xy.y+0][xy.x+1];
	s0_0_2 = G[2][xy.y+0][xy.x+2]; s0_1_0 = G[2][xy.y+1][xy.x+0];
	s0_1_1 = G[2][xy.y+1][xy.x+1]; s0_1_2 = G[2][xy.y+1][xy.x+2];
	s0_2_0 = G[2][xy.y+2][xy.x+0]; s0_2_1 = G[2][xy.y+2][xy.x+1];
	s0_2_2 = G[2][xy.y+2][xy.x+2];
	r0 += M4(-4.696e-03, -2.235e-03, -2.791e-03, -2.194e-03, -1.833e-02, -1.581e-02, 5.379e-04, -1.388e-02, 5.286e-02, -1.501e-02, -6.261e-02, -7.594e-03, -2.580e-02, 4.535e-03, -8.267e-03, 8.031e-03) * s0_0_0;
	r0 += M4(6.454e-03, 2.442e-03, -8.540e-05, 8.841e-04, 2.841e-02, 6.067e-02, 3.903e-02, -1.573e-02, 2.436e-01, 2.725e-01, -3.018e-01, -3.434e-01, 1.569e-02, -1.343e-01, -2.169e-02, 2.065e-02) * s0_0_1;
	r0 += M4(-5.644e-03, -2.650e-03, -2.462e-03, -4.319e-03, -4.638e-02, -6.754e-02, 2.064e-02, -2.172e-02, 9.204e-03, 6.314e-02, 1.391e-02, -1.852e-02, -6.803e-03, 2.286e-02, -8.599e-03, 5.785e-04) * s0_0_2;
	r0 += M4(2.435e-02, 1.320e-03, -3.189e-03, 1.138e-03, -5.201e-02, 2.589e-02, 4.576e-02, -1.926e-02, 1.763e-03, 4.594e-03, 2.996e-02, 1.729e-02, -1.821e-01, -3.079e-03, -1.220e-01, 4.117e-03) * s0_1_0;
	r0 += M4(-9.798e-03, 5.708e-03, -3.612e-03, -1.162e-02, 2.359e-01, -2.764e-01, -4.033e-01, 4.334e-02, -3.732e-03, -3.111e-03, 3.083e-02, 3.689e-02, 1.859e-01, 9.787e-02, 1.922e-01, -2.378e-01) * s0_1_1;
	r0 += M4(1.447e-02, 3.129e-02, 3.177e-04, 5.861e-03, -9.554e-03, 1.665e-01, 2.331e-02, 1.331e-02, 1.948e-03, -7.511e-04, 1.151e-02, 3.967e-02, -2.507e-03, 3.308e-02, -2.168e-04, 5.515e-02) * s0_1_2;
	r0 += M4(-4.432e-02, 7.455e-03, 9.602e-03, -1.349e-02, -1.484e-02, 1.011e-02, 1.215e-02, -8.993e-03, -1.493e-03, -1.009e-03, -5.513e-04, -9.491e-04, -1.443e-03, -5.219e-03, -5.041e-02, -1.353e-02) * s0_2_0;
	r0 += M4(-2.705e-01, -3.268e-01, 2.529e-01, 2.607e-01, -1.693e-02, 3.220e-02, 1.566e-01, 4.674e-02, 1.344e-03, 7.439e-04, 8.556e-04, 1.329e-03, 1.046e-02, 8.395e-03, 4.775e-02, 1.428e-01) * s0_2_1;
	r0 += M4(2.118e-02, -1.406e-02, -5.702e-04, 4.942e-02, -9.092e-04, -8.394e-04, -3.924e-04, 2.301e-03, -4.209e-04, -9.198e-04, -7.673e-04, -1.429e-03, -1.101e-03, -1.320e-03, -7.397e-04, 1.053e-02) * s0_2_2;
	r0 += V4(-4.856e-12, -1.403e-10, 5.258e-11, -1.187e-10);
	r0 = r0;
	vec2 opt = 0.5 * LUMA_pt;
	vec2 fpos = (vec2(opos) + vec2(0.5)) * opt;
	imageStore(out_image, opos + ivec2(0, 0), vec4(r0.x + LUMA_tex(fpos + vec2(0.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 0), vec4(r0.y + LUMA_tex(fpos + vec2(1.0, 0.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(0, 1), vec4(r0.z + LUMA_tex(fpos + vec2(0.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
	imageStore(out_image, opos + ivec2(1, 1), vec4(r0.w + LUMA_tex(fpos + vec2(1.0, 1.0) * opt).r, 0.0, 0.0, 1.0));
}
